import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import math
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.naive_bayes import BernoulliNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import SGDClassifier

from sklearn.feature_selection import SequentialFeatureSelector

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import LeaveOneOut

from sklearn.preprocessing import LabelEncoder

from sklearn.model_selection import cross_validate

import warnings



def HK_skill_score(A, B, C, D):
    HK = (A * D - C * B) / ((A + B) * (C + D))
    return HK


def Accuracy(A, B, C, D):
    Acc = (A + D) / (A + B + C + D)
    return Acc


def Balanced_Accuracy(A, B, C, D):
    Bal_Acc = ((A / (A + B)) + (D / (C + D))) / 2
    return Bal_Acc


# to test 2020 or 2022, use the correct dataset, and switch whitch dates are added halfway

# 2022 data
indicator_data = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/indicator_data_only.csv')
df_timetorain = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/time_to_rain.csv')
del indicator_data['Date']
del indicator_data['Duck']
del indicator_data['mosquito']
del df_timetorain['datetime']
df_timetorain = df_timetorain['sameday']


# del indicator_data['ants_carry_food_to_hole']



# 2020 data only
indicator_data = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/indicator_data_only_2020.csv')
df_timetorain = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/time_to_rain_2020.csv')
df_timetorain = df_timetorain['sameday']
del indicator_data['Date']
del indicator_data['Duck']
del indicator_data['mosquito']
del indicator_data['Other']

del indicator_data['ants_carry_food_to_hole']

# 2020 data voor vergelijking met Janina
...

# Meteoblue forecast
MB_prob_prec = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/MB_prob_prec_total.csv')
MB_prob_prec['datetime'] = pd.to_datetime(MB_prob_prec['datetime'])
MB_prob_prec = MB_prob_prec.rename(columns={'value_x': 'MB_rain_amount', 'value_y': 'MB_rain_prob'})
del MB_prob_prec['cat_x']
del MB_prob_prec['cat_y']
for i in MB_prob_prec.index:
    if MB_prob_prec.loc[i, 'MB_rain_prob'] < 60:
        MB_prob_prec.loc[i, 'MB_rain_amount_filt'] = 0
    if MB_prob_prec.loc[i, 'MB_rain_prob'] > 60:
        MB_prob_prec.loc[i, 'MB_rain_amount_filt'] = MB_prob_prec.loc[i, 'MB_rain_amount']
    if np.isnan(MB_prob_prec.loc[i, 'MB_rain_prob']) == True:
        if MB_prob_prec.loc[i, 'MB_rain_amount'] < 1:
            MB_prob_prec.loc[i, 'MB_rain_amount_filt'] = 0
        else:
            MB_prob_prec.loc[i, 'MB_rain_amount_filt'] = MB_prob_prec.loc[i, 'MB_rain_amount']
del MB_prob_prec['MB_rain_prob']
del MB_prob_prec['MB_rain_amount']

# add farmer observations
farmer_observations = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/Farmer_observations.csv')
farmer_observations['datetime'] = pd.to_datetime(farmer_observations['datetime'])
farmer_observations = farmer_observations.rename(columns={'value': 'Farmer_obs'})
for i in farmer_observations.index:
    if farmer_observations.loc[i, 'Farmer_obs'] > 0:
        farmer_observations.loc[i, 'Farmer_obs'] = int(1)
    else:
        farmer_observations.loc[i, 'Farmer_obs'] = int(0)

# add farmer predictions (only good farmers from Nakpanzoo, Nabogu or Yapalsi)
farmer_forecast = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/farmer_forecast_combined.csv')
farmer_forecast['datetime'] = pd.to_datetime(farmer_forecast['Unnamed: 0'])
del farmer_forecast['Unnamed: 0']

ML_results_ind_kfold_probability = pd.DataFrame(index=np.arange(len(indicator_data)))
ML_results_ind_kfold = pd.DataFrame(index=np.arange(0))
ML_results_kfold = pd.DataFrame(index=np.arange(0))
ML_results_kfold = ML_results_kfold.rename(index={0: 'Accuracy'})

list = [2,3,4,5,6,7,8,9]
list_random = [2,3,6,42]
# for r in list_random:
#     for p in list_random:
        for b in list:
            print(b)


            clf2 = RandomForestClassifier(random_state=r, n_estimators=1000)
            clf3 = BernoulliNB()
            clf4 = SVC(probability=True, random_state=r)

            models = []
            # models.append(('BNB', BernoulliNB()))
            models.append(('RF', RandomForestClassifier(random_state=r, n_estimators=100,max_depth=b)))
            # models.append(('SVM', SVC(gamma='auto',probability=True,random_state= r)))
            # models.append(('VC', VotingClassifier(estimators=[('RF', clf2), ('BNB', clf3), ('SVC', clf4)], voting='soft',weights=[2, 1, 1])))
            # models.append(('NN', MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=r, max_iter=5000)))

            warnings.filterwarnings("ignore", category=UserWarning, message="y_pred contains classes not in y_true")

            for name, model in models:
                X = indicator_data
                Y = df_timetorain
                kfold = StratifiedKFold(n_splits=5, random_state=1, shuffle=True)
                loocv = LeaveOneOut()
                # cv_score = cross_val_score(model, X, Y.values.ravel(), cv=loocv, scoring='accuracy')
                cv_predict = cross_val_predict(model, X, Y, cv=loocv, method='predict_proba')
                df_timetorain = pd.DataFrame(df_timetorain)
                df_timetorain['test_result'] = 0
                m = 0
                for no_rain, rain in cv_predict:
                    if no_rain > rain:
                        df_timetorain.loc[m, 'test_result'] = 0
                    if no_rain < rain:
                        df_timetorain.loc[m, 'test_result'] = 1
                    m += 1
                A = 0
                B = 0
                C = 0
                D = 0
                for e in df_timetorain.index:
                    if df_timetorain.loc[e, 'sameday'] == 1:
                        if df_timetorain.loc[e, 'test_result'] == 1:
                            A += 1
                        if df_timetorain.loc[e, 'test_result'] == 0:
                            B += 1
                    if df_timetorain.loc[e, 'sameday'] == 0:
                        if df_timetorain.loc[e, 'test_result'] == 1:
                            C += 1
                        if df_timetorain.loc[e, 'test_result'] == 0:
                            D += 1
                df_timetorain = df_timetorain['sameday']
                ML_results_ind_kfold.loc['Accuracy', name+str(r)] = Accuracy(A,B,C,D)
                ML_results_ind_kfold.loc['HK_score', name+str(r)] = HK_skill_score(A,B,C,D)
                ML_results_ind_kfold_probability.loc[:,'probability_rain_'+ name +str(r)] = cv_predict[:,1]
                ML_results_ind_kfold_probability.loc[:,'probability_no_rain_' + name +str(r)] = cv_predict[:,0]

                # ML_results_ind_kfold.to_csv('C:/Users/joepb/PycharmProjects/data_storage/indicator_forecast_2022_skill.csv')
                # ML_results_ind_kfold_probability.to_csv('C:/Users/joepb/PycharmProjects/data_storage/indicator_forecast_2020.csv')

                indicator_data = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/indicator_data_only.csv')
                # indicator_data = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/indicator_data_only_2020.csv')
                ML_results_ind_kfold_probability['datetime'] = indicator_data['Date']
                ML_results_ind_kfold_probability['datetime'] = pd.to_datetime(ML_results_ind_kfold_probability['datetime'])
                del indicator_data['Date']
                del indicator_data['Duck']
                del indicator_data['mosquito']
                    # inidcator probability
                    # best_ind_model = ML_results_ind.idxmax(axis=1).loc['Combined_scores']
                    # best_ind_results = pd.DataFrame()
                    # best_ind_results['probability_no_rain'] =ML_results_probability.loc[:,'probability_no_rain_' + best_ind_model]
                    # best_ind_results['probability_rain'] = ML_results_probability.loc[:,'probability_rain_' + best_ind_model]

                    # ML_results_specific = pd.DataFrame(index=np.arange(len(farmer_obs_model)), columns=np.arange(0))

            for name, model in models:
                best_ind_results = pd.DataFrame()
                best_ind_results['probability_no_rain'] = ML_results_ind_kfold_probability.loc[:, 'probability_no_rain_' + name+str(r)]
                best_ind_results['probability_rain'] = ML_results_ind_kfold_probability.loc[:, 'probability_rain_' + name+str(r)]
                # best_ind_results['total_probability'] = ML_results_probability.loc[:,'total_probability_'+ name]
                best_ind_results['datetime'] = pd.to_datetime(ML_results_ind_kfold_probability['datetime'])

                # cut out all data for which we have no farmer observations
                all_predictions_1 = MB_prob_prec.merge(best_ind_results, left_on='datetime', right_on='datetime',
                                                       how='outer')
                all_predictions_2 = all_predictions_1.merge(farmer_forecast, left_on='datetime', right_on='datetime',
                                                            how='outer')
                # all_predictions_2[['NB_rain', 'NB_no_rain']] = all_predictions_2[['NB_rain', 'NB_no_rain']].fillna(-9999)

                predicitons_with_farmer_obs = all_predictions_2.merge(farmer_observations, left_on='datetime',
                                                                      right_on='datetime', how='inner')
                predicitons_with_farmer_obs.dropna(axis=0, how='any', inplace=True)

                predctions_without_farmer_obs = predicitons_with_farmer_obs.drop('Farmer_obs', axis=1)
                predctions_without_farmer_obs.reset_index(inplace=True)
                del predctions_without_farmer_obs['index']

                farmer_obs_model = pd.DataFrame()
                farmer_obs_model['0'] = predicitons_with_farmer_obs['Farmer_obs']
                farmer_obs_model.reset_index(inplace=True)
                del farmer_obs_model['index']
                del predctions_without_farmer_obs['datetime']
                clf2_1 = RandomForestClassifier(random_state=p, n_estimators=100)
                clf3_1 = KNeighborsClassifier()
                clf4_1 = SVC(probability=True, random_state=p)
                models1 = []
                # models1.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))
                # models1.append(('LDA', LinearDiscriminantAnalysis()))
                # models1.append(('KNN', KNeighborsClassifier()))
                models1.append(('RF', RandomForestClassifier(random_state=p, n_estimators=1000,max_depth=b)))
                # models1.append(('SVM', SVC(gamma='auto', random_state=p)))
                # models1.append(('VC',
                #                 VotingClassifier(estimators=[('RF', clf2_1), ('BNB', clf3_1), ('SVC', clf4_1)], voting='soft',
                #                                  weights=[2, 1, 1])))
                # models1.append(('NN', MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=p,
                #                                     max_iter=50000000)))

                for name1, model1 in models1:
                    X1 = predctions_without_farmer_obs
                    Y1 = farmer_obs_model
                    kfold = StratifiedKFold(n_splits=10, random_state=2, shuffle=True)
                    loocv = LeaveOneOut()
                    # cv_score_2 = cross_val_score(model1, X, Y.values.ravel(), cv=kfold, scoring='balanced_accuracy')
                    # cv_score_2_bal_acc= cross_val_score(model1, X1, Y1.values.ravel(), cv=loocv, scoring='balanced_accuracy')
                    cv_score_2_acc = cross_val_score(model1, X1, Y1.values.ravel(), cv=loocv, scoring='accuracy')
                    farmer_obs_model['test_result'] = cv_score_2_acc
                    A=0
                    B=0
                    C=0
                    D=0
                    for e in farmer_obs_model.index:
                        if farmer_obs_model.loc[e, '0'] == 1:
                            if farmer_obs_model.loc[e,'test_result'] == 1:
                                A += 1
                            if farmer_obs_model.loc[e,'test_result'] == 0:
                                B += 1
                        if farmer_obs_model.loc[e, '0'] == 0:
                            if farmer_obs_model.loc[e,'test_result'] == 0:
                                C += 1
                            if farmer_obs_model.loc[e,'test_result'] == 1:
                                D += 1
                    del farmer_obs_model['test_result']
                    # ML_results_kfold.loc['Accuracy', name + name1] = np.mean(cv_score_2)
                    # ML_results_kfold.loc['std', name + name1] = np.std(cv_score_2)
                    ML_results_kfold.loc['Accuracy', name + name1 + str(r) + str(b)] = np.mean(cv_score_2_acc)
                    ML_results_kfold.loc['HK_score', name + name1 + str(r) + str(b)] = HK_skill_score(A,B,C,D)


ML_results_kfold.to_csv('C:/Users/joepb/PycharmProjects/data_storage/2022_new_best_indicator_set_rndm_test_VC_RF_tuned.csv',index=False)
ML_results_ind_kfold.to_csv('C:/Users/joepb/PycharmProjects/data_storage/2022_new_best_indicator_set_rndm_test_VC_RF_tuned_indicator.csv',index=False)
# ML_results_test_VC_and_RF_sensitivity = ML_results.copy()
ML_results_with_acc = ML_results.copy()
ML_results = ML_results.drop('Accuracy')

ML_results_with_bal_acc = ML_results.copy()
ML_results = ML_results.drop('Balanced_accuracy')

fig = plt.figure()
ax = fig.add_subplot(111)
Y = ML_results_kfold.loc['HK_score', :]
X = ML_results_kfold.loc['Accuracy', :]

ML_results_kfold = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/2022_best_indicator_set_rndm_test_all_comb',index_col='Unnamed: 0')
ML_results_kfold= pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/2022_new_best_indicator_set_rndm_test_all_comb.csv')
ML_results_kfold.rename(index={0:'Accuracy',1:'HK_score'},inplace=True)

std_dev_rndm_test_HK = pd.DataFrame(index = np.arange(1))
HK_mean = pd.DataFrame(index = np.arange(1))
std_dev_rndm_test_acc = pd.DataFrame(index = np.arange(1))
Acc_mean = pd.DataFrame(index = np.arange(1))
# ML_results_kfold_VCRF = ML_results_kfold.filter(regex='VCRF', axis=1)


for name,model in models:
    for name1,model1 in models1:
        std_dev_rndm_test_HK[name+'-'+name1] = np.std((ML_results_kfold.filter(regex=name+name1, axis=1).loc['HK_score']))
        HK_mean[name+'-'+name1] = np.mean((ML_results_kfold.filter(regex=name+name1, axis=1).loc['HK_score']))
        std_dev_rndm_test_acc[name+'-'+name1] = np.std((ML_results_kfold.filter(regex=name+name1, axis=1).loc['Accuracy']))
        Acc_mean[name+'-'+name1] = np.mean((ML_results_kfold.filter(regex=name+name1, axis=1).loc['Accuracy']))

down_side_combs = ['RFRF', 'NNSVM', 'SVMNN']

fig = plt.figure()
ax = fig.add_subplot(111)
X = std_dev_rndm_test_HK.iloc[0,:]
Y = HK_mean.iloc[0,:]
plt.plot(X, Y, 'bx') # Plotting data
# plt.xticks(X, Y) # Redefining x-axis labels
storage = []
for j in HK_mean.columns:
    v = std_dev_rndm_test_HK.loc[:,j].values
    i =  HK_mean.loc[:,j].values
    storage += [i+v]
    # if j == 'SVM-SVM':
    #     ax.annotate(j, xy=(v, i), xytext=(20, -10), textcoords='offset pixels',
    #                 arrowprops=dict(arrowstyle="->", color="0.5",
    #                                 shrinkA=5, shrinkB=5,
    #                                 patchA=None, patchB=None,
    #                                 connectionstyle="arc3,rad=0.",
    #                             ), )

    if storage.count(i+v)>2:
        ax.annotate(j, xy=(v, i), xytext=(20, -20), textcoords='offset pixels',arrowprops=dict(arrowstyle="->", color="0.5",
                                shrinkA=5, shrinkB=5,
                                patchA=None, patchB=None,
                                connectionstyle="arc3,rad=0.",
                                ))
    elif storage.count(i+v)>1:
        ax.annotate(j, xy=(v, i), xytext=(-20, 25), textcoords='offset pixels',
                    arrowprops=dict(arrowstyle="->", color="0.5",
                                    shrinkA=5, shrinkB=5,
                                    patchA=None, patchB=None,
                                    connectionstyle="arc3,rad=0.",
                                ), )
    elif j == 'RF-RF' or j == 'NN-SVM' or j == 'SVM-NN' or j=='NN-KNN'or j=='BNB-KNN' :
        ax.annotate(j, xy=(v, i), xytext=(-20, -15), textcoords='offset pixels')


    else:
        ax.annotate(j, xy=(v,i), xytext=(-20,4) ,textcoords='offset pixels')
Axis.set(ax,  xlim=(-0.01,0.12), ylim = (0.35,0.75) )
# ax.set_xlim = ([0.35,0.75])
ax.set_xlabel(xlabel = 'Standard Deviation',fontsize=12)
ax.set_ylabel(ylabel = 'HK Skill Score',fontsize=12)
plt.savefig('C:/Users/joepb/OneDrive/Documenten/Wageningen - Uni/Master Thesis/Draft thesis figures and docs/scatterplot_2022_HK_vs_std_dev.png',bbox_inches= 'tight')
plt.show()



# PLotting all combinations of models seperately to see standard deviation
for name,model in models:
    for name1,model1 in models1:
        fig = plt.figure()
        ax = fig.add_subplot(111)
        X = ML_results_kfold.filter(regex=name + name1, axis=1).loc['Accuracy']
        Y = ML_results_kfold.filter(regex=name+name1, axis=1).loc['HK_score']
        plt.plot(X, Y, 'bx')  # Plotting data
        # plt.xticks(X, Y) # Redefining x-axis labels
        storage = []
        for j in ML_results_kfold.filter(regex=name+name1, axis=1).columns:
            v,i = ML_results_kfold.filter(regex=name+name1, axis=1).loc[:, j].values.tolist()
            storage += [i + v]
            if storage.count(i + v) > 2:
                ax.annotate(j, xy=(v, i), xytext=(20, -20), textcoords='offset pixels',
                            arrowprops=dict(arrowstyle="->", color="0.5",
                                            shrinkA=5, shrinkB=5,
                                            patchA=None, patchB=None,
                                            connectionstyle="arc3,rad=0.",
                                            ))
            elif storage.count(i + v) > 1:
                ax.annotate(j, xy=(v, i), xytext=(-20, -40), textcoords='offset pixels',
                            arrowprops=dict(arrowstyle="->", color="0.5",
                                            shrinkA=5, shrinkB=5,
                                            patchA=None, patchB=None,
                                            connectionstyle="arc3,rad=0.",
                                            ), )
            else:
                ax.annotate(j, xy=(v, i), xytext=(-20, 4), textcoords='offset pixels')

        #
        # plt.scatter(X,Y)
        # plt.xticks(X_axis, X)
        plt.xlabel('Accuracy')
        plt.ylabel("HK skill score")
        # plt.title("Number of Students in each group")
        # plt.legend()
        # plt.savefig(
        #     'C:/Users/joepb/OneDrive/Documenten/Wageningen - Uni/Master Thesis/Draft thesis figures and docs/vergelijking_methods/'+(name+name1) +'.png',
        #     bbox_inches='tight')
        plt.show()

#figure indicator skill

ML_results_2022_all_ind = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/indicator_forecast_2022_skill.csv',index_col='Unnamed: 0')
ML_results_ind_kfold = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/indicator_forecast_2020_skill.csv',index_col='Unnamed: 0')


std_dev_rndm_test_HK_ind = pd.DataFrame(index = np.arange(1))
std_dev_rndm_test_HK_ind.rename(index={0:'HK standard deviation'},inplace=True)
HK_ind = pd.DataFrame(index = np.arange(1))
HK_ind.rename(index={0:'Average HK score'},inplace=True)
std_dev_rndm_test_acc_ind = pd.DataFrame(index = np.arange(1))
std_dev_rndm_test_acc_ind.rename(index={0:'Acc standard deviation'},inplace=True)
Acc_ind = pd.DataFrame(index = np.arange(1))
Acc_ind.rename(index={0:'Average Accuracy'},inplace=True)


for name,model in models:
    std_dev_rndm_test_HK_ind.loc['HK standard deviation',name] = np.std(ML_results_ind_kfold.filter(regex=name, axis=1).loc['HK_score'])
    HK_ind.loc['Average HK score',name] = np.mean(ML_results_ind_kfold.filter(regex=name, axis=1).loc['HK_score', :])
    std_dev_rndm_test_acc_ind.loc['Acc standard deviation',name] = np.std(ML_results_ind_kfold.filter(regex=name, axis=1).loc['Accuracy'])
    Acc_ind.loc['Average Accuracy',name] = np.mean(ML_results_ind_kfold.filter(regex=name, axis=1).loc['Accuracy', :])

std_dev_rndm_test_2020 = pd.DataFrame(index = np.arange(1))
std_dev_rndm_test_2020.rename(index={0:'HK standard deviation'},inplace=True)
HK_ind_2020 = pd.DataFrame(index = np.arange(1))
HK_ind_2020.rename(index={0:'Average HK score'},inplace=True)
std_dev_rndm_test_acc_2020 = pd.DataFrame(index = np.arange(1))
std_dev_rndm_test_acc_2020.rename(index={0:'Acc standard deviation'},inplace=True)
Acc_ind_2020 = pd.DataFrame(index = np.arange(1))
Acc_ind_2020.rename(index={0:'Average Accuracy'},inplace=True)


for name,model in models:
    std_dev_rndm_test_2020.loc['HK standard deviation',name] = np.std(ML_results_ind_kfold.filter(regex=name, axis=1).loc['HK_score'])
    HK_ind_2020.loc['Average HK score',name] = np.mean(ML_results_ind_kfold.filter(regex=name, axis=1).loc['HK_score', :])
    std_dev_rndm_test_acc_2020.loc['Acc standard deviation',name] = np.std(ML_results_ind_kfold.filter(regex=name, axis=1).loc['Accuracy'])
    Acc_ind_2020.loc['Average Accuracy',name] = np.mean(ML_results_ind_kfold.filter(regex=name, axis=1).loc['Accuracy', :])


# my_colors = ['#5e98d9', '#1c7cbd', '#4a4a4a', '#8cce6e']

fig, ax = plt.subplots()
X = HK_ind.columns
X_axis = np.arange(len(X))

my_colors = ['#D2691E','#653700','#7BC8F6','#000080']

bar = ax.bar(X_axis-0.1, HK_ind.loc['Average HK score',:].to_list(), 0.2, label='HK score (indicators)', color=my_colors[0], bottom=0)
bar1 = ax.bar(X_axis+0.1, Acc_ind.loc['Average Accuracy',:].to_list(), 0.2, label='Accuracy (indicators)', color=my_colors[1], bottom=0)

y_error = std_dev_rndm_test.loc['HK standard deviation',:].values
error = ax.errorbar(X_axis-0.1, HK_ind.loc['Average HK score',:].to_list(), color='r',yerr=y_error,fmt="o")

y_error1 = std_dev_rndm_test_acc.loc['Acc standard deviation',:].values
error1 = ax.errorbar(X_axis+0.1, Acc_ind.loc['Average Accuracy',:].to_list(), color='r',yerr=y_error1,fmt="o")


ax.set_xticks(X_axis)
ax.set_xticklabels(X)
ax.set_yticks(ticks=[-0.1,0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8])
ax.set_ylabel('Skill', fontsize=12)
ax.text(0,0.79,s = 'a', fontsize='x-large', verticalalignment='top', fontfamily='serif')
# ax.legend(fontsize=10, loc='upper center', bbox_to_anchor=(0.48, 1.1))
handles = [bar, bar1,error]
labels = ['HK score (Indicators)','Accuracy (Indicators)','Standard deviation']
ax.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.48, 1.1), ncol=4)

ax.yaxis.grid(True, linestyle='--')

plt.savefig('C:/Users/joepb/OneDrive/Documenten/Wageningen - Uni/Master Thesis/Draft thesis figures and docs/barplot_indicators_random_test_2020',bbox_inches= 'tight')
plt.show()


